# @author:Wei Junjie
# @time:2024/5/31 16:52
# @file spider_urls.py
# --*--coding: utf-8 --*--
"""
ip 代理池生成
"""
import json

# 导包
from fake_useragent import UserAgent
import requests
import re
# 创建列表收集IP
URL_MAP=[]

# 获取所有的一页的IP列表
def get_all_ip(page)->list:
    UA = UserAgent().random
    headers = {'User-Agent': UA}
    response=requests.get(f'https://www.kuaidaili.com/free/fps/{page}/',headers=headers)
    ip_list=json.loads(re.search('const fpsList = (.*?);', response.text).group(1))
    return ip_list
# 验证一页的IP列表
def verify_ip(ip_list)->None:
    headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36'}
    for ip in ip_list:
        temp_ip='http://'+ip['ip']+':'+ip['port']
        try:
            response=requests.get('https://www.baidu.com',headers=headers,proxies={'http':temp_ip})
            if response.status_code == 200:
                URL_MAP.append(temp_ip)
        except Exception:
            continue

# 主程序接口
def main(start_page,end_page):
    for page in range(start_page,end_page+1):
        ip_list=get_all_ip(page)
        verify_ip(ip_list)

if __name__ == '__main__':
    pass